

*	This code generates results for the following tables: Tables 1, 2 (Panels A,B, and C), 3, 4, 5, 6, 9, 10, 11, and 12;

options threads cpucount=8 sortsize=32G bufno=8;
dm log 'clear';

%let mainfolder=D:\Dropbox\Characteristics Factors\RFS_Replication\;

libname mylib "&mainfolder.\Data_Cleaned";

proc datasets library=work nolist kill;
run;
quit;

%let enddate='31DEC2023'd;

*macro to reorder factors so that the output csv is the same order as reported table;
%macro reorder(indata=);

data &indata;
set &indata;
if factor in ('smb',"smb_ff5") then id=1;
if factor in ('hml') then id=2;
if factor in ('rmw') then id=3;
if factor in ('cma') then id=4;
if factor in ('ia') then id=5;
if factor in ('roe') then id=6;
if factor in ('pmu') then id=7;
if factor in ('qmj') then id=8;
if factor in ('umd') then id=9;
if factor in ('mgmt') then id=10;
if factor in ('perf') then id=11;
if factor in ('fin') then id=12;
if factor in ('pead') then id=13;
if factor in ('avg') then id=14;
if factor in ('avg_nosmb') then id=15;
if factor in ('avg_nobf') then id=16;
if factor in ('avg_nobfq') then id=17;
if factor in ('avg_nosmbbf') then id=18;
if factor in ('avg_nosmbbfq') then id=19;
run;

proc sort data=&indata out=&indata(drop=id);
by id;
run;

%mend reorder;

*macro to reorder macro factors so that the output csv is the same order as reported table;
%macro reorder1(indata=);

data &indata;
set &indata;
if factor in ('con') then id=1;
if factor in ('ipg') then id=2;
if factor in ('term') then id=3;
if factor in ('def') then id=4;
if factor in ('ui') then id=5;
if factor in ('dei') then id=6;
if factor in ('vol') then id=7;
if factor in ('mkt') then id=8;
if factor in ('lab') then id=9;
if factor in ('tfp') then id=10;
if factor in ('avg1') then id=11;
if factor in ('avg2') then id=12;
if factor in ('avg3') then id=13;
run;

proc sort data=&indata out=&indata(drop=id);
by id;
run;

%mend reorder1;
****************************************************************************************;

*	T1 Panel A/C: correlation	;
proc datasets library=work kill nolist;
run;
quit;

proc sort data=mylib.allfactors out=factors;
by date;
where '01JUL1963'd<=date<=&enddate;
run;

proc sort data=factors;
by date;
run;
*	lagbw;
data bw;
set mylib.macro_monthly(keep=date bw);
where not missing(bw);
date=intnx('month',date,1,'e');
format date yymmddn8.;
keep date bw;
run;

proc sort data=bw;
by date;
run;
*	contemporaneous dbw;
data dbw;
set mylib.macro_monthly(keep=date dbw);
where not missing(dbw);
run;

proc sort data=dbw;
by date;
where not missing(dbw);
run;

data factors1;
merge factors(in=in1) bw dbw;
by date;
if in1;
run;
*	bw is from t-1, factors & dbw are from t	;
proc corr data=factors1 outp=a noprint;
var smb_ff5 hml rmw cma ia roe pmu qmj umd mgmt perf fin pead bw dbw;
run;

data a;
set a;
where _type_='CORR';
run;

proc export data=a outfile="&mainfolder.\Result\Table1_PanelAC.csv" replace;run;
*	T1 Panel B: factor return and capm alpha	;
%macro betaport(factor=,start=);

dm 'odsresults; clear';

proc sort data=factors(keep=date &factor mktrf) out=spread(rename=(&factor=excessret)) nodupkey;
by date;
where &start<=date<=&enddate and not missing(&factor) and not missing(mktrf);
run;

proc sort data=spread out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;

data excessret(drop=parameter rename=(estimate=exret tvalue=tret));
set excessret(keep=parameter estimate tvalue);
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

data capmalpha(drop=parameter rename=(estimate=alpha tvalue=talpha));
set capmalpha(keep=parameter estimate tvalue);
where parameter='b0';
run;

proc sql;
create table f 
as select distinct a.*,b.*
from excessret as a,capmalpha as b;
quit;

data f;
set f;
factor="&factor";
exret=exret*100;
alpha=alpha*100;
run;

proc sql;
create table f
as select distinct a.*,b.date as firstdate format=yymmddn8.
from f as a,firstdate as b;
quit;

proc sql;
create table f
as select distinct a.*,b.date as lastdate format=yymmddn8.
from f as a,lastdate as b;
quit;

proc append data=f base=table1 force;
run;

%mend betaport;

%macro all;

*dm log 'clear';

proc sql;
drop table table1;
quit;

data table1;
format factor $24.;
format exret 12.2;
format tret 8.2;
format alpha 12.2;
format talpha 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

%betaport(factor=smb_ff5,start='01JUL1963'd);
%betaport(factor=hml,start='01JUL1963'd);
%betaport(factor=rmw,start='01JUL1963'd);
%betaport(factor=cma,start='01JUL1963'd);
%betaport(factor=ia,start='01JAN1967'd);
%betaport(factor=roe,start='01JAN1967'd);
%betaport(factor=pmu,start='01JUL1963'd);
%betaport(factor=qmj,start='01JUL1963'd);
%betaport(factor=umd,start='01JUL1963'd);
%betaport(factor=mgmt,start='01JUL1963'd);
%betaport(factor=perf,start='01JUL1963'd);
%betaport(factor=fin,start='01JUL1972'd);
%betaport(factor=pead,start='01JUL1972'd);

proc export data=table1(where=(not missing(factor))) outfile="&mainfolder.\Result\Table1_PanelB.csv" replace;run;

%mend all;

%let lags=5;

%all;
**************************************************************************************************************;
**************************************************************************************************************;
**************************************************************************************************************;
**************************************************************************************************************;

*	Main stock tests	;
proc datasets library=work nolist kill;
run;
quit;
*	bw sentiment data starts from 196507, so I will just start the sample from 196507.;
*	dp dy ep de svar bm ltr tms dfy dfr + predict return, needs to create a negative version of 
the variable so that all variables negatively predict return	;
data macro(drop=date rename=(date1=date));
set mylib.macro_monthly;
where '01JUL1965'd<=date and year(date)<=2023;
date1=intnx('month',date,1,'e');*move to crsp return timing;
format date1 yymmddn8.;
newdp=-dp; 
newdy=-dy;
newep=-ep;
newde=-de;
newsvar=-svar;
newbm=-bm;
newltr=-ltr;
newtms=-tms;
newdfy=-dfy;
newdfr=-dfr; 
run;

proc sort data=macro;
by date;
run;
*	dbw contemporaneous with return;
proc sort data=mylib.macro_monthly(keep=date dbw) out=dbw;
by date;
where not missing(dbw);
run;

data macro;
merge macro(drop=dbw in=in1) dbw(in=in2);
by date;
if in1;
if date<=&enddate;
run;

data ret;
set mylib.crsp;
where year(date)>=1960;
if (dlstcd = 500 | dlstcd <= 584 and dlstcd >= 520) & hexcd = 3 & (dlret = .S | dlret = .T | dlret = .P) then Ddlret = -.55;
else if (dlstcd = 500 | dlstcd <= 584 and dlstcd >= 520) & (hexcd = 1 | hexcd =2 ) & (dlret = .S | dlret = .T | dlret = .P) then Ddlret = -.3;
else if dlret = .S | dlret = .T | dlret = .A | dlret = .P then Ddlret = .;
else DDLRET=DLRET;
if Dlstcd=. or dlstcd=100 THEN ret=rET; 
ELSE ret=ddlret;
date=intnx('month',date,0,'e');
format date yymmddn8.;
me=abs(prc)*shrout/1000;*ME in millions;
prc=abs(prc);
keep permno date me ret prc exchcd shrcd siccd;
Run;

proc sort data=ret nodupkey;
by _all_;
where not missing(me);
run;

proc sort data=ret nodupkey;
by permno date descending ret;
run;

proc sort data=ret nodupkey;
by permno date;
run;

data lagme(keep=permno date lagme lagprc lagexchcd lagshrcd lagsiccd);
set ret;
date=intnx('month',date,1,'e');
format date yymmddn8.;
rename me=lagme prc=lagprc exchcd=lagexchcd shrcd=lagshrcd siccd=lagsiccd;
run;

proc sort data=lagme nodupkey;
by permno date;
where not missing(permno) and not missing(date) and not missing(lagme);
run;

proc sort data=ret nodupkey;
by date permno;
run;

data ff3_factors;
set mylib.allfactors(keep=date rf mktrf smb hml);
date=intnx('month',date,0,'e');
format date yymmddn8.;
run;

proc sort data=ff3_factors;
by date;
run;

data macro;
merge macro(in=in1) ff3_factors(keep=date mktrf in=in2);
by date;
if in1 and in2;
run;

proc sort data=macro;
by date;
run;

data ret(drop=rf);
merge ret(keep=permno date ret in=in1) ff3_factors(keep=date rf in=in2);
by date;
if in1 and in2;
excessret=ret-rf;
run;

proc sort data=ret;
by permno date;
run;

data all;
merge lagme(in=in1) ret(keep=permno date excessret ret in=in2);
by permno date;
if in1;
run;
***********************************************************************************************************************;


/*	T2: Beta portfolios returns and ex post betas, unconditional. */
%macro betaport(factor=);

dm 'odsresults; clear';

proc datasets library=work nolist;
save macro all ff3_factors all1 beta table1 avgspread factors;
run;

%if &factor=umd or &factor=perf %then %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and '01JUL1966'd<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 and 
freq_&factor>=&minday;
run;

%end;

%else %if &factor=pead %then %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and '01JUL1975'd<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 and 
freq_&factor>=&minday;
run;

%end;

%else %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and '01JUL1966'd<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 and 
freq_&factor>=&minmonth;
run;

%end;

proc univariate data=port1(where=(not missing(beta1_&factor) and 1<=exchcd<=3)) noprint;
by date;
var beta1_&factor;
output out=brkpt pctlpre=P_ pctlpts= 10 to 90 by 10;
run;

proc sort data=brkpt;
by date;
run;

data port1(drop=P_10 P_20 P_30 P_40 P_50 P_60 P_70 P_80 P_90);
merge port1 brkpt;
by date;
if not(beta1_&factor=.) and beta1_&factor<P_10 then cgogroup=0;
if P_10<=beta1_&factor<P_20 then cgogroup=1;
if P_20<=beta1_&factor<P_30 then cgogroup=2;
if P_30<=beta1_&factor<P_40 then cgogroup=3;
if P_40<=beta1_&factor<P_50 then cgogroup=4;
if P_50<=beta1_&factor<P_60 then cgogroup=5;
if P_60<=beta1_&factor<P_70 then cgogroup=6;
if P_70<=beta1_&factor<P_80 then cgogroup=7;
if P_80<=beta1_&factor<P_90 then cgogroup=8;
if beta1_&factor>=P_90 then cgogroup=9;
run;

data port1;
set port1;
portfolio=cgogroup;
if portfolio=. or date=. then delete;
drop cgogroup;
run;

proc sort data=port1;
by date portfolio;
run;

proc means data=port1 noprint;
var excessret;
by date portfolio;
weight lagme;
output out=group_ret(drop=_type_ _Freq_) mean=excessret;
run;

proc sql;
create table group_ret
as select date,portfolio,excessret
from group_ret 
group by date
having count(portfolio)=10
order by date,portfolio;
quit;

proc sql;
create table group_ret_6
as select distinct a.date,(a.excessret-b.excessret) as excessret,66 as portfolio
from group_ret as a,group_ret as b
where a.date=b.date and a.portfolio=9 and b.portfolio=0;
quit;

data spread;
set group_ret group_ret_6;
run; 

proc sort data=spread;
by date;
where portfolio=0 or portfolio=9 or portfolio=66;
run;

data spread;
merge spread(in=in1) factors(keep=date &factor mktrf in=in2);
by date;
if in1 and in2;
run;

data tmp;
set spread;
proxy="&factor";
factor=&factor;
keep date excessret portfolio proxy factor;
run;

proc append data=tmp base=avgspread force;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

proc sort data=spread out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret(keep=portfolio parameter estimate tvalue) out=excessret(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha(keep=portfolio parameter estimate tvalue) out=capmalpha(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b0';
run;
*	Note, the data is named as capmbeta, but it is actually the beta from single characteristics-based factor model, not from CAPM;
ods output parameterestimates=capmbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog &factor;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*&factor;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmbeta(keep=portfolio parameter estimate tvalue) out=capmbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

data f;
merge excessret(rename=(estimate=exret tvalue=tret)) capmbeta(rename=(estimate=beta tvalue=tbeta)) capmalpha(rename=(estimate=alpha tvalue=talpha));
by portfolio;
factor="&factor";
exret=exret*100;
alpha=alpha*100;
run;
*	add date	;
proc sql;
create table f
as select distinct a.*,b.date as firstdate format=yymmddn8.
from f as a,firstdate as b;
quit;

proc sql;
create table f
as select distinct a.*,b.date as lastdate format=yymmddn8.
from f as a,lastdate as b;
quit;

proc sort data=f;
by portfolio;
run;

proc append data=f base=table1 force;
run;

%mend betaport;

%macro suball(avgexclude=);

dm 'odsresults; clear';
proc datasets library=work nolist;
save macro all ff3_factors beta table1 avgspread factors spread2;
run;

proc sort data=avgspread;
by portfolio date;
where not missing(portfolio);
run;

proc means data=avgspread noprint;
where &avgexclude;
var excessret;
by portfolio date;
output out=spread(drop=_type_ _Freq_) mean=excessret;
run;

proc sort data=spread;
by date;
run;

data spread;
merge spread(in=in1) factors(keep=date mktrf in=in2);
by date;
if in1 and in2;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret(keep=portfolio parameter estimate tvalue) out=excessret(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha(keep=portfolio parameter estimate tvalue) out=capmalpha(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b0';
run;

data f;
merge excessret(rename=(estimate=exret tvalue=tret)) capmalpha(rename=(estimate=alpha tvalue=talpha));
by portfolio;
exret=exret*100;
alpha=alpha*100;
run;

%mend suball;

%macro all(mbeta=,dbeta=);

proc datasets library=work nolist;
save macro all ff3_factors;
run;

proc sort data=mylib.allfactors out=factors;
by date;
run;

proc sort data=factors;
by date;
run;

data mbeta;
set &mbeta;
date=intnx('month',date,1,'e');
format date yymmddn8.;
drop beta1_umd beta1_pead beta1_perf freq_umd freq_pead freq_perf;
run;

proc sort data=mbeta nodupkey;
by permno date;
run;

data dbeta;
set &dbeta;
date=intnx('month',date,1,'e');
format date yymmddn8.;
run;

proc sort data=dbeta nodupkey;
by permno date;
run;

data beta;
merge mbeta(in=in1) dbeta(in=in2);
by permno date;
run;

proc sort data=beta nodupkey;
by permno date;
run;

data all1;
merge all(in=in1) beta(in=in3);
by permno date;
if in1 and in3;
run;

data table1;
format factor $24.;
format portfolio 8.0;
format exret 12.2;
format tret 8.2;
format beta 12.2;
format tbeta 8.2;
format alpha 12.2;
format talpha 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

data avgspread;
format proxy $24.;
format portfolio 8.0;
format excessret 12.6;
format factor 12.6;
format date yymmddn8.;
run;

%betaport(factor=smb_ff5);
%betaport(factor=hml);
%betaport(factor=rmw);
%betaport(factor=cma);
%betaport(factor=ia);
%betaport(factor=roe);
%betaport(factor=pmu);
%betaport(factor=qmj);
%betaport(factor=umd);
%betaport(factor=mgmt);
%betaport(factor=perf);
%betaport(factor=fin);
%betaport(factor=pead);

proc datasets library=work nolist;
save macro all ff3_factors beta table1 avgspread factors;
run;
*	correlation of portfolio spread as in T2 Panel A	;
%macro corr(var=);

proc sort data=avgspread out=spread1_&var(keep=date excessret rename=(excessret=&var));
by date;
where proxy="&var" and portfolio=66;
run;

%mend;

%corr(var=smb_ff5);
%corr(var=hml);
%corr(var=rmw);
%corr(var=cma);
%corr(var=ia);
%corr(var=roe);
%corr(var=pmu);
%corr(var=qmj);
%corr(var=umd);
%corr(var=mgmt);
%corr(var=perf);
%corr(var=fin);
%corr(var=pead);

*	excess return for average portfolio		;
/*	with smb		*/
proc sort data=avgspread;
by portfolio date;
where not missing(portfolio);
run;

proc means data=avgspread noprint;
var excessret;
by portfolio date;
output out=spread(drop=_type_ _Freq_) mean=excessret;
run;

proc sort data=spread;
by date;
run;

data spread;
merge spread(in=in1) factors(keep=date mktrf in=in2);
by date;
if in1 and in2;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret(keep=portfolio parameter estimate tvalue) out=excessret(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha(keep=portfolio parameter estimate tvalue) out=capmalpha(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b0';
run;

data f;
merge excessret(rename=(estimate=exret tvalue=tret)) capmalpha(rename=(estimate=alpha tvalue=talpha));
by portfolio;
factor="avg";
exret=exret*100;
alpha=alpha*100;
run;

proc append data=f base=table1 force;run;

proc sort data=spread out=spread2(rename=(excessret=avg));
by date;
where portfolio=66;
run;

data spread2;
merge spread2 spread1_smb_ff5 spread1_hml spread1_rmw spread1_cma spread1_ia spread1_roe spread1_pmu spread1_qmj spread1_umd spread1_mgmt spread1_perf 
 spread1_fin spread1_pead;
by date;
run;
/*	export data to STATA to run regressions for average portfolio with fixed effects+cluster standard error	*/
data avgspread1;
set avgspread;
if portfolio=66 then portfolio=10;
run;

proc sort data=avgspread1;
by portfolio date proxy;
run;

proc export data=avgspread1(where=(not missing(proxy))) outfile= "&mainfolder.\Data_Cleaned\avgspread_all.dta" replace;run;
********************************************************;
*	 no smb;
%suball(avgexclude=(not(proxy in ("smb","smb_ff5"))));
data f;
set f;
factor="avg_nosmb";
run;

proc append data=f base=table1 force;run;

proc sort data=spread out=spread2_1(rename=(excessret=avg_nosmb));
by date;
where portfolio=66;
run;

data spread2;
merge spread2 spread2_1;
by date;
run;
*	Organize table based on reported format;
*	T2 Panel B, return and alpha;
proc sort data=table1(keep=factor portfolio exret tret alpha talpha) out=table1_1;
by factor;
where not missing(portfolio);
run;

data table1_t2b(drop=portfolio);
merge table1_1(drop=alpha talpha where=(portfolio=9) rename=(exret=exret9 tret=tret9))
table1_1(drop=alpha talpha where=(portfolio=0) rename=(exret=exret0 tret=tret0))
table1_1(where=(portfolio=66) rename=(exret=exret66 tret=tret66 alpha=alpha66 talpha=talpha66));
by factor;
run;

data table1_t2b;
retain factor exret9 tret9 exret0 tret0 exret66 tret66 alpha66 talpha66;
set table1_t2b;
run;

%reorder(indata=table1_t2b);

proc export data=table1_t2b outfile="&mainfolder.\Result\Table2_PanelB.csv" replace;run;
*	T2 Panel C, beta;
proc sort data=table1(keep=factor portfolio beta tbeta) out=table1_2;
by factor;
where not missing(portfolio);
run;

data table1_t2c(drop=portfolio);
merge table1_2(where=(portfolio=9) rename=(beta=beta9 tbeta=tbeta9))
table1_2(where=(portfolio=0) rename=(beta=beta0 tbeta=tbeta0))
table1_2(where=(portfolio=66) rename=(beta=beta66 tbeta=tbeta66));
by factor;
run;

data table1_t2c;
retain factor beta9 tbeta9 beta0 tbeta0 beta66 tbeta66;
set table1_t2c;
run;

%reorder(indata=table1_t2c);

proc export data=table1_t2c(where=(not missing(beta9))) outfile="&mainfolder.\Result\Table2_PanelC.csv" replace;run;

*	T2 Panel A;
proc corr data=spread2 outp=table2 noprint;
var smb_ff5 hml rmw cma ia roe pmu qmj umd mgmt perf fin pead avg avg_nosmb;
run;

proc export data=table2(where=(_type_='CORR')) outfile="&mainfolder.\Result\Table2_PanelA.csv" replace;run;

%mend all;

%let lags=5;

*dm log 'clear';
%let minmonth=36;
%let minday=45;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3);
*************************************************************************************************************;




/*	T3/4/9/11: Portfolios and predictive regressions	*/
%macro betaport(factor=);

dm 'odsresults; clear';

proc datasets library=work nolist;
save macro all ff3_factors bw all1 beta table1 avgspread alltable1;
run;

%if &factor=umd or &factor=perf %then %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd lagprc rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and &startdate<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 and freq_&factor>=&minday;
run;

%end;

%else %if &factor=pead %then %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd lagprc rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and max('01JUL1975'd,&startdate)<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 and freq_&factor>=&minday;
run;

%end;

%else %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd lagprc rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and &startdate<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 and freq_&factor>=&minmonth;
run;

%end;

proc sort data=port1(drop=lagprc) nodupkey;
by date permno;
run;

proc univariate data=port1(where=(not missing(beta1_&factor) and 1<=exchcd<=3)) noprint;
by date;
var beta1_&factor;
output out=brkpt pctlpre=P_ pctlpts= 10 to 90 by 10;
run;

proc sort data=brkpt;
by date;
run;

data port1(drop=P_10 P_20 P_30 P_40 P_50 P_60 P_70 P_80 P_90);
merge port1 brkpt;
by date;
if not(beta1_&factor=.) and beta1_&factor<P_10 then cgogroup=0;
if P_10<=beta1_&factor<P_20 then cgogroup=1;
if P_20<=beta1_&factor<P_30 then cgogroup=2;
if P_30<=beta1_&factor<P_40 then cgogroup=3;
if P_40<=beta1_&factor<P_50 then cgogroup=4;
if P_50<=beta1_&factor<P_60 then cgogroup=5;
if P_60<=beta1_&factor<P_70 then cgogroup=6;
if P_70<=beta1_&factor<P_80 then cgogroup=7;
if P_80<=beta1_&factor<P_90 then cgogroup=8;
if beta1_&factor>=P_90 then cgogroup=9;
run;

data port1;
set port1;
portfolio=cgogroup;
if portfolio=. or date=. then delete;
drop cgogroup;
run;

proc sort data=port1;
by date portfolio;
run;

proc means data=port1 noprint;
var excessret;
by date portfolio;
weight lagme;
output out=group_ret(drop=_type_ _Freq_) mean=excessret;
run;

proc sql;
create table group_ret
as select date,portfolio,excessret
from group_ret 
group by date
having count(portfolio)=10
order by date,portfolio;
quit;

proc sql;
create table group_ret_6
as select distinct a.date,(a.excessret-b.excessret) as excessret,66 as portfolio
from group_ret as a,group_ret as b
where a.date=b.date and a.portfolio=9 and b.portfolio=0;
quit;

data spread;
set group_ret group_ret_6;
run; 

proc sort data=spread;
by date;
where portfolio=0 or portfolio=9 or portfolio=66;
run;

data spread;
merge spread(in=in1) ff3_factors(keep=date mktrf in=in2) bw(in=in3);
by date;
if in1 and in2 and in3;
run;

data tmp;
set spread;
proxy="&factor";
keep date excessret portfolio proxy;
run;

proc append data=tmp base=avgspread force;
run;
*	conditional return spread	;
proc sort data=spread nodupkey;
by portfolio high date;
where not missing(excessret) and not missing(high);
run;

proc sort data=spread out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 by portfolio high;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret(keep=portfolio high parameter estimate tvalue) out=excessret(keep=portfolio high estimate tvalue);
by portfolio;
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 by portfolio high;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha(keep=portfolio high parameter estimate tvalue) out=capmalpha(keep=portfolio high estimate tvalue);
by portfolio;
where parameter='b0';
run;

data spread;
set spread;
mktrf1=mktrf*high;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessret1;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog high;
 instruments _exog_;
 parms b0 b2;
 excessret=b0+b2*high;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret1(keep=portfolio parameter estimate tvalue) out=excessret1(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b2';
run;

ods output parameterestimates=capmalpha1;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog mktrf high mktrf1;
 instruments _exog_;
 parms b0 b1 b2 b3;
 excessret=b0+b1*mktrf+b2*high+b3*mktrf1;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha1(keep=portfolio parameter estimate tvalue) out=capmalpha1(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b2';
run;

data f(drop=high);
merge excessret(where=(high=1) rename=(estimate=exret_high tvalue=tret_high)) capmalpha(where=(high=1) rename=(estimate=alpha_high tvalue=talpha_high)) 
 excessret(where=(high=0) rename=(estimate=exret_low tvalue=tret_low)) capmalpha(where=(high=0) rename=(estimate=alpha_low tvalue=talpha_low)) 
 excessret1(rename=(estimate=exret_diff tvalue=tret_diff)) capmalpha1(rename=(estimate=alpha_diff tvalue=talpha_diff));
by portfolio;
exret_high=exret_high*100;
alpha_high=alpha_high*100;
exret_low=exret_low*100;
alpha_low=alpha_low*100;
exret_diff=exret_diff*100;
alpha_diff=alpha_diff*100;
run;
*************************************;
*	predictive reg	;
proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*bw;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

ods output parameterestimates=capmbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw mktrf;
 instruments _exog_;
 parms b0 b1 b2;
 excessret=b0+b1*bw+b2*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmbeta(keep=portfolio parameter estimate tvalue) out=capmbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

data f1;
merge excessretbeta(rename=(estimate=exret_beta tvalue=tret_beta)) capmbeta(rename=(estimate=capm_beta tvalue=tcapm_beta));
by portfolio;
factor="&factor";
exret_beta=exret_beta*100;*bw coefficient*100, to be consistent with unit of return/alpha just like excessret on the lhv is in % when run regression;
capm_beta=capm_beta*100;
run;

data f;
merge f f1;
by portfolio;
run;
*	add date	;
proc sql;
create table f
as select distinct a.*,b.date as firstdate format=yymmddn8.
from f as a,firstdate as b;
quit;

proc sql;
create table f
as select distinct a.*,b.date as lastdate format=yymmddn8.
from f as a,lastdate as b;
quit;

proc sort data=f;
by portfolio;
run;
*	Can ignore the SAS WARNING: Variable macro was not found on DATA file., because it is only used 
for organizing tables and I already assign value to it earlier;; 
proc append data=f base=table1 force;
run;

%mend betaport;

%macro suball(avgexclude=);

dm 'odsresults; clear';

proc datasets library=work nolist;
save macro all ff3_factors bw all1 beta table1 avgspread alltable1;
run;

proc means data=avgspread noprint;
where &avgexclude;
var excessret;
by portfolio date;
output out=spread(drop=_type_ _Freq_) mean=excessret;
run;

proc sort data=spread;
by date;
run;

data spread;
merge spread(in=in1) ff3_factors(keep=date mktrf in=in2) bw(in=in3);
by date;
if in1 and in2 and in3;
run;
*	conditional return spread	;
proc sort data=spread nodupkey;
by portfolio high date;
where not missing(excessret);
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 by portfolio high;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret(keep=portfolio high parameter estimate tvalue) out=excessret(keep=portfolio high estimate tvalue);
by portfolio;
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 by portfolio high;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha(keep=portfolio high parameter estimate tvalue) out=capmalpha(keep=portfolio high estimate tvalue);
by portfolio;
where parameter='b0';
run;

data spread;
set spread;
mktrf1=mktrf*high;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessret1;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog high;
 instruments _exog_;
 parms b0 b2;
 excessret=b0+b2*high;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret1(keep=portfolio parameter estimate tvalue) out=excessret1(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b2';
run;

ods output parameterestimates=capmalpha1;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog mktrf high mktrf1;
 instruments _exog_;
 parms b0 b1 b2 b3;
 excessret=b0+b1*mktrf+b2*high+b3*mktrf1;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha1(keep=portfolio parameter estimate tvalue) out=capmalpha1(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b2';
run;

data f(drop=high);
merge excessret(where=(high=1) rename=(estimate=exret_high tvalue=tret_high)) capmalpha(where=(high=1) rename=(estimate=alpha_high tvalue=talpha_high)) 
 excessret(where=(high=0) rename=(estimate=exret_low tvalue=tret_low)) capmalpha(where=(high=0) rename=(estimate=alpha_low tvalue=talpha_low)) 
 excessret1(rename=(estimate=exret_diff tvalue=tret_diff)) capmalpha1(rename=(estimate=alpha_diff tvalue=talpha_diff));
by portfolio;
exret_high=exret_high*100;
alpha_high=alpha_high*100;
exret_low=exret_low*100;
alpha_low=alpha_low*100;
exret_diff=exret_diff*100;
alpha_diff=alpha_diff*100;
run;
*************************************;
*	predictive reg	;
proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*bw;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

ods output parameterestimates=capmbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw mktrf;
 instruments _exog_;
 parms b0 b1 b2;
 excessret=b0+b1*bw+b2*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmbeta(keep=portfolio parameter estimate tvalue) out=capmbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

data f1;
merge excessretbeta(rename=(estimate=exret_beta tvalue=tret_beta)) capmbeta(rename=(estimate=capm_beta tvalue=tcapm_beta));
by portfolio;
exret_beta=exret_beta*100;
capm_beta=capm_beta*100;
run;

data f;
merge f f1;
by portfolio;
run;

%mend suball;

%macro all(mbeta=,dbeta=,macro=);

proc datasets library=work nolist;
save macro all ff3_factors alltable1;
run;

data mbeta;
set &mbeta;
date=intnx('month',date,1,'e');
format date yymmddn8.;
drop beta1_umd beta1_pead beta1_perf freq_umd freq_pead freq_perf;
run;

proc sort data=mbeta nodupkey;
by permno date;
run;

data dbeta;
set &dbeta;
date=intnx('month',date,1,'e');
format date yymmddn8.;
run;

proc sort data=dbeta nodupkey;
by permno date;
run;

data beta;
merge mbeta(in=in1) dbeta(in=in2);
by permno date;
run;

data all1;
merge all(in=in1) beta(in=in3);
by permno date;
if in1 and in3;
run;

data bw;
set macro(keep=date &macro);
where not missing(&macro);
rename &macro=bw;
run;

proc rank data=bw out=bw group=2 ties=dense;
var bw;
ranks high;
run;

proc standard data=bw mean=0 std=1 out=bw;
var bw;
run;

proc sort data=bw;
by date;
run;

data table1;
format macro $24.;
format factor $24.;
format portfolio 8.0;
format exret_high 12.2;
format tret_high 8.2;
format alpha_high 12.2;
format talpha_high 8.2;
format exret_low 12.2;
format tret_low 8.2;
format alpha_low 12.2;
format talpha_low 8.2;
format exret_diff 12.2;
format tret_diff 8.2;
format alpha_diff 12.2;
format talpha_diff 8.2;
format exret_beta 12.2;
format tret_beta 8.2;
format capm_beta 12.2;
format tcapm_beta 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

data table1;
set table1;
macro="&macro";
run;

data avgspread;
format proxy $24.;
format portfolio 8.0;
format excessret 12.6;
format date yymmddn8.;
run;

%betaport(factor=smb_ff5);
%betaport(factor=hml);
%betaport(factor=rmw);
%betaport(factor=cma);
%betaport(factor=ia);
%betaport(factor=roe);
%betaport(factor=pmu);
%betaport(factor=qmj);
%betaport(factor=umd);
%betaport(factor=mgmt);
%betaport(factor=perf);
%betaport(factor=fin);
%betaport(factor=pead);

*	excess return for average portfolio		;
proc datasets library=work nolist;
save macro all ff3_factors bw all1 beta table1 avgspread alltable1;
run;

proc sort data=avgspread;
by portfolio date;
where not missing(portfolio);
run;

proc means data=avgspread noprint;
var excessret;
by portfolio date;
output out=spread(drop=_type_ _Freq_) mean=excessret;
run;

proc sort data=spread;
by date;
run;

data spread;
merge spread(in=in1) ff3_factors(keep=date mktrf in=in2) bw(in=in3);
by date;
if in1 and in2 and in3;
run;
*	conditional return spread	;
proc sort data=spread nodupkey;
by portfolio high date;
where not missing(excessret);
run;

ods output parameterestimates=excessret;
ods listing close;
proc model data=spread;
 by portfolio high;
 endo excessret;
 instruments / intonly;
 parms b0;
 excessret=b0;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret(keep=portfolio high parameter estimate tvalue) out=excessret(keep=portfolio high estimate tvalue);
by portfolio;
where parameter='b0';
run;

ods output parameterestimates=capmalpha;
ods listing close;
proc model data=spread;
 by portfolio high;
 endo excessret;
 exog mktrf;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha(keep=portfolio high parameter estimate tvalue) out=capmalpha(keep=portfolio high estimate tvalue);
by portfolio;
where parameter='b0';
run;

data spread;
set spread;
mktrf1=mktrf*high;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessret1;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog high;
 instruments _exog_;
 parms b0 b2;
 excessret=b0+b2*high;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessret1(keep=portfolio parameter estimate tvalue) out=excessret1(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b2';
run;

ods output parameterestimates=capmalpha1;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog mktrf high mktrf1;
 instruments _exog_;
 parms b0 b1 b2 b3;
 excessret=b0+b1*mktrf+b2*high+b3*mktrf1;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmalpha1(keep=portfolio parameter estimate tvalue) out=capmalpha1(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b2';
run;

data f(drop=high);
merge excessret(where=(high=1) rename=(estimate=exret_high tvalue=tret_high)) capmalpha(where=(high=1) rename=(estimate=alpha_high tvalue=talpha_high)) 
 excessret(where=(high=0) rename=(estimate=exret_low tvalue=tret_low)) capmalpha(where=(high=0) rename=(estimate=alpha_low tvalue=talpha_low)) 
 excessret1(rename=(estimate=exret_diff tvalue=tret_diff)) capmalpha1(rename=(estimate=alpha_diff tvalue=talpha_diff));
by portfolio;
exret_high=exret_high*100;
alpha_high=alpha_high*100;
exret_low=exret_low*100;
alpha_low=alpha_low*100;
exret_diff=exret_diff*100;
alpha_diff=alpha_diff*100;
run;
*************************************;
*	predictive reg	;
proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw;
 instruments _exog_;
 parms b0 b1;
 excessret=b0+b1*bw;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

ods output parameterestimates=capmbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw mktrf;
 instruments _exog_;
 parms b0 b1 b2;
 excessret=b0+b1*bw+b2*mktrf;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=capmbeta(keep=portfolio parameter estimate tvalue) out=capmbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

data f1;
merge excessretbeta(rename=(estimate=exret_beta tvalue=tret_beta)) capmbeta(rename=(estimate=capm_beta tvalue=tcapm_beta));
by portfolio;
factor="avg";
exret_beta=exret_beta*100;
capm_beta=capm_beta*100;
run;

data f;
merge f f1;
by portfolio;
run;

proc append data=f base=table1 force;run;
********************************************************;

*	 no smb;
%suball(avgexclude=(not(proxy in ("smb","smb_ff5"))));
data f;
set f;
factor="avg_nosmb";
run;

proc append data=f base=table1 force;run;

*	no behavior;
%suball(avgexclude=(not(proxy in ("mgmt","perf","fin","pead"))));
data f;
set f;
factor="avg_nobf";
run;

proc append data=f base=table1 force;run;

*	no behavior & qmj;
%suball(avgexclude=(not(proxy in ("mgmt","perf","fin","pead","qmj"))));
data f;
set f;
factor="avg_nobfq";
run;

proc append data=f base=table1 force;run;

*	no smb & behavior;
%suball(avgexclude=(not(proxy in ("smb","smb_ff5","mgmt","perf","fin","pead"))));
data f;
set f;
factor="avg_nosmbbf";
run;

proc append data=f base=table1 force;run;

*	no smb & behavior & qmj;
%suball(avgexclude=(not(proxy in ("smb","smb_ff5","mgmt","perf","fin","pead","qmj"))));
data f;
set f;
factor="avg_nosmbbfq";
run;

proc append data=f base=table1 force;run;

%if &macro=bw or &macro=mcsi or &macro=PLS or &macro=aaii or &macro=cb %then %do; 
*	Organize table based on reported format;
*	return;
proc sort data=table1(keep=factor portfolio exret_high tret_high exret_low tret_low exret_diff tret_diff) 
out=table1_1;
by factor;
where not missing(portfolio);
run;

data table1_t3a(drop=portfolio);
merge table1_1(where=(portfolio=0) rename=(exret_high=exret0_high tret_high=tret0_high exret_low=exret0_low tret_low=tret0_low exret_diff=exret0_diff tret_diff=tret0_diff))
table1_1(where=(portfolio=9) rename=(exret_high=exret9_high tret_high=tret9_high exret_low=exret9_low tret_low=tret9_low exret_diff=exret9_diff tret_diff=tret9_diff))
table1_1(where=(portfolio=66) rename=(exret_high=exret66_high tret_high=tret66_high exret_low=exret66_low tret_low=tret66_low exret_diff=exret66_diff tret_diff=tret66_diff));
by factor;
run;

data table1_t3a;
retain factor exret0_low tret0_low exret0_high tret0_high exret0_diff tret0_diff
exret9_low tret9_low exret9_high tret9_high exret9_diff tret9_diff
exret66_low tret66_low exret66_high tret66_high exret66_diff tret66_diff;
set table1_t3a;
run;

%reorder(indata=table1_t3a);
*	CAPM alpha;
proc sort data=table1(keep=factor portfolio alpha_high talpha_high alpha_low talpha_low alpha_diff talpha_diff) 
out=table1_2;
by factor;
where not missing(portfolio);
run;

data table1_t3b(drop=portfolio);
merge table1_2(where=(portfolio=0) rename=(alpha_high=alpha0_high talpha_high=talpha0_high alpha_low=alpha0_low talpha_low=talpha0_low alpha_diff=alpha0_diff talpha_diff=talpha0_diff))
table1_2(where=(portfolio=9) rename=(alpha_high=alpha9_high talpha_high=talpha9_high alpha_low=alpha9_low talpha_low=talpha9_low alpha_diff=alpha9_diff talpha_diff=talpha9_diff))
table1_2(where=(portfolio=66) rename=(alpha_high=alpha66_high talpha_high=talpha66_high alpha_low=alpha66_low talpha_low=talpha66_low alpha_diff=alpha66_diff talpha_diff=talpha66_diff));
by factor;
run;

data table1_t3b;
retain factor alpha0_low talpha0_low alpha0_high talpha0_high alpha0_diff talpha0_diff
alpha9_low talpha9_low alpha9_high talpha9_high alpha9_diff talpha9_diff
alpha66_low talpha66_low alpha66_high talpha66_high alpha66_diff talpha66_diff;
set table1_t3b;
run;

%reorder(indata=table1_t3b);
*	beta;
proc sort data=table1(keep=factor portfolio exret_beta tret_beta) out=table1_3;
by factor;
where not missing(portfolio);
run;

data table1_t4a(drop=portfolio);
merge table1_3(where=(portfolio=0) rename=(exret_beta=exret0_beta tret_beta=tret0_beta))
table1_3(where=(portfolio=9) rename=(exret_beta=exret9_beta tret_beta=tret9_beta))
table1_3(where=(portfolio=66) rename=(exret_beta=exret66_beta tret_beta=tret66_beta));
by factor;
run;

data table1_t4a;
retain factor exret0_beta tret0_beta exret9_beta tret9_beta exret66_beta tret66_beta;
set table1_t4a;
run;

%reorder(indata=table1_t4a);
*	beta controlling for mktrf;
proc sort data=table1(keep=factor portfolio capm_beta tcapm_beta) out=table1_4;
by factor;
where not missing(portfolio);
run;

data table1_t4b(drop=portfolio);
merge table1_4(where=(portfolio=0) rename=(capm_beta=capm0_beta tcapm_beta=tcapm0_beta))
table1_4(where=(portfolio=9) rename=(capm_beta=capm9_beta tcapm_beta=tcapm9_beta))
table1_4(where=(portfolio=66) rename=(capm_beta=capm66_beta tcapm_beta=tcapm66_beta));
by factor;
run;

data table1_t4b;
retain factor capm0_beta tcapm0_beta capm9_beta tcapm9_beta capm66_beta tcapm66_beta;
set table1_t4b;
run;

%reorder(indata=table1_t4b);

%end;

%else %do;

data table1;
set table1;
run;

proc append data=table1 base=alltable1 force;run;

%end;

%mend all;

%let lags=5;
%let startdate='01JUL1966'd;
%let enddate='31DEC2023'd;

*	Table 3: main test return, TIA1: CAPM alphas, & T4: beta. ;
*dm log 'clear';
%let minmonth=36;
%let minday=45;
*this alltable1 data is not used here but used in T11 macro placebo test, but since alltable1 is in the saved datasets step in the beginning of the macro, so 
I just create an empty data here ;
data alltable1; 
format macro $24.;
format factor $24.;
format portfolio 8.0;
format exret_high 12.2;
format tret_high 8.2;
format alpha_high 12.2;
format talpha_high 8.2;
format exret_low 12.2;
format tret_low 8.2;
format alpha_low 12.2;
format talpha_low 8.2;
format exret_diff 12.2;
format tret_diff 8.2;
format alpha_diff 12.2;
format talpha_diff 8.2;
format exret_beta 12.2;
format tret_beta 8.2;
format capm_beta 12.2;
format tcapm_beta 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;
*	Table 3 Panel A & Table IA16 Panel A.1, return;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=bw);
proc export data=table1_t3a outfile="&mainfolder.\Result\Table3.csv" replace;run;
*	Table IA1, CAPM alpha & Table IA16 Panel A.2, the Avg and Avg_nosmb portfolios from this result is the last four rows of Table 3;
proc export data=table1_t3b outfile="&mainfolder.\Result\TableIA1.csv" replace;run;
*	Table 4 Panel A & Table IA16 Panel B.1, beta;
proc export data=table1_t4a outfile="&mainfolder.\Result\Table4_PanelA.csv" replace;run;
*	Table 4 Panel B & Table IA16 Panel B.2, beta controlling for mktrf;
proc export data=table1_t4b outfile="&mainfolder.\Result\Table4_PanelB.csv" replace;run;


*	Table 9 : robustness, other sentiment measures, Avg and Avg_nosmb from each output are reported in T9;
*dm log 'clear';
%let minmonth=36;
%let minday=45;
data alltable1;
format macro $24.;
format factor $24.;
format portfolio 8.0;
format exret_high 12.2;
format tret_high 8.2;
format alpha_high 12.2;
format talpha_high 8.2;
format exret_low 12.2;
format tret_low 8.2;
format alpha_low 12.2;
format talpha_low 8.2;
format exret_diff 12.2;
format tret_diff 8.2;
format alpha_diff 12.2;
format talpha_diff 8.2;
format exret_beta 12.2;
format tret_beta 8.2;
format capm_beta 12.2;
format tcapm_beta 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=mcsi);*Panel A, starting from 197801;
proc export data=table1_t3a(where=(factor in ('avg','avg_nosmb'))) outfile="&mainfolder.\Result\Table9_PanelA.csv" replace;run;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=cb);*Panel B, data start from 197706;
proc export data=table1_t3a(where=(factor in ('avg','avg_nosmb'))) outfile="&mainfolder.\Result\Table9_PanelB.csv" replace;run;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=PLS);*Panel C;
proc export data=table1_t3a(where=(factor in ('avg','avg_nosmb'))) outfile="&mainfolder.\Result\Table9_PanelC.csv" replace;run;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=aaii);*Panel D, data start from 198707;
proc export data=table1_t3a(where=(factor in ('avg','avg_nosmb'))) outfile="&mainfolder.\Result\Table9_PanelD.csv" replace;run;


*	Table 11: robustness, macro placebo test;
*dm log 'clear';
%let minmonth=36;
%let minday=45;

data alltable1;
format macro $24.;
format factor $24.;
format portfolio 8.0;
format exret_high 12.2;
format tret_high 8.2;
format alpha_high 12.2;
format talpha_high 8.2;
format exret_low 12.2;
format tret_low 8.2;
format alpha_low 12.2;
format talpha_low 8.2;
format exret_diff 12.2;
format tret_diff 8.2;
format alpha_diff 12.2;
format talpha_diff 8.2;
format exret_beta 12.2;
format tret_beta 8.2;
format capm_beta 12.2;
format tcapm_beta 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;
*options nonotes;
*	since dp dy ep de svar bm ltr tms dfy dfr + predict return, so use its negative version so that high state negative predicts return	;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newdp);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newdy);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newep);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newde);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newsvar);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newbm);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=ntis);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=tbl);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=lty);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newltr);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newtms);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newdfy);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=newdfr);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=inflation);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=splus);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,macro=disp);
*options nonotes;
*	get the first/last date of each macro;
%macro alldate(macro=);

proc sql;
drop table a,a1,firstdate,lastdate;
quit;

proc sort data=macro(keep=date &macro) out=a(keep=date) nodupkey;
by date;
where not missing(&macro);
run;

data firstdate;
set a;
if _n_=1;
run;

proc sort data=a;
by descending date;
run;

data lastdate;
set a;
if _n_=1;
run;

proc sql;
create table a1
as select distinct a.date as firstdate format=yymmddn8.,b.date as lastdate format=yymmddn8.
from firstdate as a,lastdate as b;
quit;

data a1;
set a1;
macro="&macro";
if firstdate<'01JUL1966'd then firstdate='01JUL1966'd;*since the portfolio macro has startdate=196607, so set firstdate to be the later between 
the actual firstdate and 196607;
format firstdate yymmddn8.;
run;

proc append data=a1 base=alla force;run;

%mend alldate;

data alla;
format macro $24.;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

%alldate(macro=newdp);
%alldate(macro=newdy);
%alldate(macro=newep);
%alldate(macro=newde);
%alldate(macro=newsvar);
%alldate(macro=newbm);
%alldate(macro=ntis);
%alldate(macro=tbl);
%alldate(macro=lty);
%alldate(macro=newltr);
%alldate(macro=newtms);
%alldate(macro=newdfy);
%alldate(macro=newdfr);
%alldate(macro=inflation);
%alldate(macro=splus);
%alldate(macro=disp);

proc sort data=alla;
by macro;
where not missing(macro);
run;

data a(keep=macro n);
set alltable1;
where not missing(macro) or not missing(portfolio);
n=_n_;
run;

data a(keep=macro n);
set a;
where not missing(macro);
n=n+1;
run;

proc sort data=a;
by macro;
run;

data a;
merge a(in=in1) alla(in=in2);
by macro;
if in1 and in2;
run;

proc sort data=a;
by n;
run;

data b(drop=macro);
set alltable1(drop=firstdate lastdate);
where not missing(macro) or not missing(portfolio);
n=_n_;
run;

proc sort data=b;
by n;
run;

data alltable2;
merge a b;
by n;
run;

data alltable2;
set alltable2(drop=n);
where not missing(macro) or not missing(portfolio);
run;

data alltable2(drop=firstdate lastdate macro rename=(firstdate1=firstdate lastdate1=lastdate macro1=macro));
set alltable2;
retain macro1 firstdate1 lastdate1;
if not missing(macro) then macro1=macro;
if not missing(firstdate) then firstdate1=firstdate;
if not missing(lastdate) then lastdate1=lastdate;
format firstdate1 lastdate1 yymmddn8.;
run;

data alltable2;
retain macro;
set alltable2;
run;
*	Only keep avg related result and organize table based on reported format;
data alltable2_1;
set alltable2;
where upcase(factor) in ("AVG","AVG_NOSMB");
run;

data alltable2_1;
retain macro factor portfolio exret_high tret_high alpha_high talpha_high exret_low tret_low alpha_low talpha_low exret_diff tret_diff 
alpha_diff talpha_diff exret_beta tret_beta capm_beta tcapm_beta firstdate lastdate;
set alltable2_1;
run;
*	Table 11 Panel A, low state;
data table2_t11a(drop=portfolio);
set alltable2_1(keep=macro factor portfolio exret_low tret_low);
where not missing(portfolio) and portfolio=66;
run;

proc export data=table2_t11a outfile="&mainfolder.\Result\Table11_PanelA.csv" replace;run;
*	Table 11 Panel B, high state;
data table2_t11b(drop=portfolio);
set alltable2_1(keep=macro factor portfolio exret_high tret_high);
where not missing(portfolio) and portfolio=66;
run;

proc export data=table2_t11b outfile="&mainfolder.\Result\Table11_PanelB.csv" replace;run;
*	Table 11 Panel C, high-low state;
data table2_t11c(drop=portfolio);
set alltable2_1(keep=macro factor portfolio exret_diff tret_diff);
where not missing(portfolio) and portfolio=66;
run;

proc export data=table2_t11c outfile="&mainfolder.\Result\Table11_PanelC.csv" replace;run;

*************************************************************************************************************;
*****************************************************************************************************************;
**********************************************************************************************;
**********************************************************************************************;
**********************************************************************************************;



/*	Table 5/6/10/12: regressions	*/
%macro betaport(factor=);

dm 'odsresults; clear';

proc datasets library=work nolist;
save macro all ff3_factors all1 beta table1 avgspread;
run;

%if &factor=umd or &factor=perf %then %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and '01JUL1966'd<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 
and freq_&factor>=&minday;
run;

%end;

%else %if &factor=pead %then %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and '01JUL1975'd<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 
and freq_&factor>=&minday;
run;

%end;

%else %do;

proc sort data=all1(keep=date permno lagme beta1_&factor excessret lagexchcd lagshrcd lagprc freq_&factor) 
out=port1(keep=date permno lagme beta1_&factor excessret lagexchcd rename=(lagexchcd=exchcd)) nodupkey;
by date permno;
where not missing(beta1_&factor) and '01JUL1966'd<=date<=&enddate and 1<=lagexchcd<=3 and 10<=lagshrcd<=11 
and freq_&factor>=&minmonth;
run;

%end;

proc univariate data=port1(where=(not missing(beta1_&factor) and 1<=exchcd<=3)) noprint;
by date;
var beta1_&factor;
output out=brkpt pctlpre=P_ pctlpts= 10 to 90 by 10;
run;

proc sort data=brkpt;
by date;
run;

data port1(drop=P_10 P_20 P_30 P_40 P_50 P_60 P_70 P_80 P_90);
merge port1 brkpt;
by date;
if not(beta1_&factor=.) and beta1_&factor<P_10 then cgogroup=0;
if P_10<=beta1_&factor<P_20 then cgogroup=1;
if P_20<=beta1_&factor<P_30 then cgogroup=2;
if P_30<=beta1_&factor<P_40 then cgogroup=3;
if P_40<=beta1_&factor<P_50 then cgogroup=4;
if P_50<=beta1_&factor<P_60 then cgogroup=5;
if P_60<=beta1_&factor<P_70 then cgogroup=6;
if P_70<=beta1_&factor<P_80 then cgogroup=7;
if P_80<=beta1_&factor<P_90 then cgogroup=8;
if beta1_&factor>=P_90 then cgogroup=9;
run;

data port1;
set port1;
portfolio=cgogroup;
if portfolio=. or date=. then delete;
drop cgogroup;
run;

proc sort data=port1;
by date portfolio;
run;

proc means data=port1 noprint;
var excessret;
by date portfolio;
weight lagme;
output out=group_ret(drop=_type_ _Freq_) mean=excessret;
run;

proc sql;
create table group_ret
as select date,portfolio,excessret
from group_ret 
group by date
having count(portfolio)=10
order by date,portfolio;
quit;

proc sql;
create table group_ret_6
as select distinct a.date,(a.excessret-b.excessret) as excessret,66 as portfolio
from group_ret as a,group_ret as b
where a.date=b.date and a.portfolio=9 and b.portfolio=0;
quit;

data spread;
set group_ret group_ret_6;
run; 

proc sort data=spread;
by date;
where portfolio=0 or portfolio=9 or portfolio=66;
run;

data spread;
merge spread(in=in1) ff3_factors(keep=date mktrf in=in2);
by date;
if in1 and in2;
run;

data tmp;
set spread;
proxy="&factor";
keep date excessret portfolio proxy;
run;

proc append data=tmp base=avgspread force;
run;

%if &method=one %then %do;

data aa;
set macro(keep=date &macro &bw rename=(&macro=sv &bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(sv) and not missing(bw);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(sv) and not missing(bw);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(sv) and not missing(bw);
run;

proc sort data=spread out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw sv;
 instruments _exog_;
 parms b0 b1 b2;
 excessret=b0+b1*bw+b2*sv;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

%if &method=multi %then %do;

data aa;
set macro(keep=date &bw dfy tms realrate cay ted inflation disp rename=(&bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

proc sort data=spread out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw dfy tms realrate cay ted inflation disp;
 instruments _exog_;
 parms b0 b1 b2 b3 b4 b5 b6 b7 b8;
 excessret=b0+b1*bw+b2*dfy+b3*tms+b4*realrate+b5*cay+b6*ted+b7*inflation+b8*disp;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

%if &method=multi1 %then %do;

data aa;
set macro(keep=date &bw dfy tms realrate cay rename=(&bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

proc sort data=spread out=date(keep=date) nodupkey;
by date;
run;

data firstdate;
set date;
if _n_=1;
run;

proc sort data=date;
by descending date;
run;

data lastdate;
set date;
if _n_=1;
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw dfy tms realrate cay;
 instruments _exog_;
 parms b0 b1 b2 b3 b4 b5;
 excessret=b0+b1*bw+b2*dfy+b3*tms+b4*realrate+b5*cay;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

data f;
set excessretbeta(rename=(estimate=exret_beta tvalue=tret_beta));
factor="&factor";
exret_beta=exret_beta*100;
run;
*	add date	;
proc sql;
create table f
as select distinct a.*,b.date as firstdate format=yymmddn8.
from f as a,firstdate as b;
quit;

proc sql;
create table f
as select distinct a.*,b.date as lastdate format=yymmddn8.
from f as a,lastdate as b;
quit;

proc sort data=f;
by portfolio;
run;
*	Can ignore SAS WARNING: Variable bw was not found on DATA file. & WARNING: Variable macro was not found on DATA file. because they are only used 
for organizing tables and I already assign values to these two variables earlier;
proc append data=f base=table1 force;
run;

%mend betaport;

%macro suball(avgexclude=);

dm 'odsresults; clear';
proc datasets library=work nolist;
save macro all ff3_factors all1 beta table1 avgspread;
run;

proc sort data=avgspread;
by portfolio date;
run;

proc means data=avgspread noprint;
where &avgexclude;
var excessret;
by portfolio date;
output out=spread(drop=_type_ _Freq_) mean=excessret;
run;

proc sort data=spread;
by date;
run;

%if &method=one %then %do;

data aa;
set macro(keep=date &macro &bw rename=(&macro=sv &bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(sv) and not missing(bw);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(sv) and not missing(bw);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(sv) and not missing(bw);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw sv;
 instruments _exog_;
 parms b0 b1 b2;
 excessret=b0+b1*bw+b2*sv;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

%if &method=multi %then %do;

data aa;
set macro(keep=date &bw dfy tms realrate cay ted inflation disp rename=(&bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw dfy tms realrate cay ted inflation disp;
 instruments _exog_;
 parms b0 b1 b2 b3 b4 b5 b6 b7 b8;
 excessret=b0+b1*bw+b2*dfy+b3*tms+b4*realrate+b5*cay+b6*ted+b7*inflation+b8*disp;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

%if &method=multi1 %then %do;

data aa;
set macro(keep=date &bw dfy tms realrate cay rename=(&bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw dfy tms realrate cay;
 instruments _exog_;
 parms b0 b1 b2 b3 b4 b5;
 excessret=b0+b1*bw+b2*dfy+b3*tms+b4*realrate+b5*cay;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

data f;
set excessretbeta(rename=(estimate=exret_beta tvalue=tret_beta));
exret_beta=exret_beta*100;
run;

%mend suball;

%macro all(mbeta=,dbeta=,bw=,macro=);

proc datasets library=work nolist;
save macro all ff3_factors;
run;

data mbeta;
set &mbeta;
date=intnx('month',date,1,'e');
format date yymmddn8.;
drop beta1_umd beta1_pead beta1_perf freq_umd freq_pead freq_perf;
run;

proc sort data=mbeta nodupkey;
by permno date;
run;

data dbeta;
set &dbeta;
date=intnx('month',date,1,'e');
format date yymmddn8.;
run;

proc sort data=dbeta nodupkey;
by permno date;
run;

data beta;
merge mbeta(in=in1) dbeta(in=in2);
by permno date;
run;

data all1;
merge all(in=in1) beta(in=in3);
by permno date;
if in1 and in3;
run;

data table1;
format bw $24.;
format macro $24.;
format factor $24.;
format portfolio 8.0;
format exret_beta 12.2;
format tret_beta 8.2;
format firstdate yymmddn8.;
format lastdate yymmddn8.;
run;

data table1;
set table1;
bw="&bw";
macro="&macro";
run;

data avgspread;
format proxy $24.;
format portfolio 8.0;
format excessret 12.6;
format date yymmddn8.;
run;

%betaport(factor=smb_ff5);
%betaport(factor=hml);
%betaport(factor=rmw);
%betaport(factor=cma);
%betaport(factor=ia);
%betaport(factor=roe);
%betaport(factor=pmu);
%betaport(factor=qmj);
%betaport(factor=umd);
%betaport(factor=mgmt);
%betaport(factor=perf);
%betaport(factor=fin);
%betaport(factor=pead);


proc datasets library=work nolist;
save macro all ff3_factors all1 beta table1 avgspread;
run;

proc sort data=avgspread;
by portfolio date;
run;

proc means data=avgspread noprint;
var excessret;
by portfolio date;
output out=spread(drop=_type_ _Freq_) mean=excessret;
run;

proc sort data=spread;
by date;
run;

%if &method=one %then %do;

data aa;
set macro(keep=date &macro &bw rename=(&macro=sv &bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(sv) and not missing(bw);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(sv) and not missing(bw);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(sv) and not missing(bw);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw sv;
 instruments _exog_;
 parms b0 b1 b2;
 excessret=b0+b1*bw+b2*sv;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

%if &method=multi %then %do;

data aa;
set macro(keep=date &bw dfy tms realrate cay ted inflation disp rename=(&bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay) and not missing(ted) and not missing(inflation)
and not missing(disp);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw dfy tms realrate cay ted inflation disp;
 instruments _exog_;
 parms b0 b1 b2 b3 b4 b5 b6 b7 b8;
 excessret=b0+b1*bw+b2*dfy+b3*tms+b4*realrate+b5*cay+b6*ted+b7*inflation+b8*disp;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

%if &method=multi1 %then %do;

data aa;
set macro(keep=date &bw dfy tms realrate cay rename=(&bw=bw));
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

proc standard data=aa mean=0 std=1 out=aa;
var bw;
run;

proc sort data=aa nodupkey;
by date;
where not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

data spread;
merge spread(in=in1) aa(in=in3);
by date;
if in1 and in3;
run;

proc sort data=spread nodupkey;
by portfolio date;
where not missing(excessret) and not missing(bw) and not missing(dfy) and not missing(tms) and not missing(realrate) and not missing(cay);
run;

ods output parameterestimates=excessretbeta;
ods listing close;
proc model data=spread;
 by portfolio;
 endo excessret;
 exog bw dfy tms realrate cay;
 instruments _exog_;
 parms b0 b1 b2 b3 b4 b5;
 excessret=b0+b1*bw+b2*dfy+b3*tms+b4*realrate+b5*cay;
 fit excessret / gmm kernel=(bart,%eval(&lags+1),0) vardef=n; 
run;
quit;
ods listing;

proc sort data=excessretbeta(keep=portfolio parameter estimate tvalue) out=excessretbeta(keep=portfolio estimate tvalue);
by portfolio;
where parameter='b1';
run;

%end;

data f;
set excessretbeta(rename=(estimate=exret_beta tvalue=tret_beta));
factor="avg";
exret_beta=exret_beta*100;
run;

proc append data=f base=table1 force;run;
********************************************************;

*	 no smb;
%suball(avgexclude=(not(proxy in ("smb","smb_ff5"))));
data f;
set f;
factor="avg_nosmb";
run;

proc append data=f base=table1 force;run;

data a(keep=bw macro n);
set table1;
where not missing(macro);
n=_n_;
run;

proc sort data=a;
by n;
run;

data b;
set table1(drop=bw macro);
where not missing(factor);
n=_n_;
run;

proc sort data=b;
by n;
run;

data table2(drop=n);
merge a b;
by n;
run;

data table2(drop=macro bw rename=(macro1=macro bw1=bw));
set table2;
retain macro1 bw1;
if not missing(macro) then macro1=macro;
if not missing(bw) then bw1=bw;
run;

data table2;
retain bw macro factor portfolio exret_beta tret_beta firstdate lastdate;
set table2;
run;

proc sort data=table2 out=table2_3;
by bw macro factor;
where not missing(portfolio);
run;

data table2_t6(drop=portfolio);
merge table2_3(where=(portfolio=0) drop=firstdate lastdate rename=(exret_beta=exret0_beta tret_beta=tret0_beta))
table2_3(where=(portfolio=9) drop=firstdate lastdate rename=(exret_beta=exret9_beta tret_beta=tret9_beta))
table2_3(where=(portfolio=66) rename=(exret_beta=exret66_beta tret_beta=tret66_beta));
by bw macro factor;
run;

data table2_t6;
retain bw macro factor exret0_beta tret0_beta exret9_beta tret9_beta exret66_beta tret66_beta firstdate lastdate;
set table2_t6;
run;

%reorder(indata=table2_t6);

%if &macro=mktrf or &macro=multi or &macro=multi1 %then %do;

proc export data=table2_t6 outfile="&mainfolder.\Result\&bw._&macro..csv" replace;run;

%end;

%else %do;

proc export data=table2_t6(drop=firstdate lastdate where=(index(upcase(factor),"AVG")>=1)) outfile="&mainfolder.\Result\&bw._&macro..csv" replace;run;

%end;

%mend all;

%let lags=5;

%let minmonth=36;
%let minday=45;

*	Table 5: predictive regressions on lagged sentiment and macro variables (default premium, term premium, real interest rate & log wealth consumption
ratio)	;
*dm log 'clear';
%let method=multi1;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=multi1);


*	Table 6: contemporaneous regressions on change in sentiment	;
*dm log 'clear';
%let method=one;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=dbw,macro=mktrf);


*	Table 10: predictive regressions on lagged sentiment and additional macro variables	;
*dm log 'clear';
%let ted=ted;
%let method=multi;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=multi);


*	Table 12: predictive regressions on lagged sentiment controlling for additional state variables;
*dm log 'clear';
%let method=one;
*options nonotes;
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=dp);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=dy);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=ep);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=de);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=svar);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=bm);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=ntis);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=tbl);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=lty);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=ltr);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=tms);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=dfy);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=dfr);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=inflation);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=splus);
%all(mbeta=mylib.beta1_60,dbeta=mylib.beta2_3,bw=bw,macro=disp);
*options notes;
******************************************************************************************;

